The database we are going to use is a data set that contains specifications for every Porsche 911 model since its debut in 1964.
The Porsche 911 or 911 (Pronounced as “Nine-Eleven”) is a famous and iconic sports car produced by Porsche AG becoming one of the most recognizable and enduring sports car designs in the world.
With almost 60 years of racing lineage the 911 has gone though several changes in order to maintain the symbol of a high-performance sports car.
# New data frame
porsche911 = data.frame(
subset(porsche_911,select = c("generation",
"engine",
"start_of_production",
"maximum_speed",
"acceleration_0-100km/h",
"weight-to-power_ratio", "power", "max_weight")))
# CLEANING
# Rename
colnames(porsche911)[colnames(porsche911) == "generation"] = "Generation"
colnames(porsche911)[colnames(porsche911) == "engine"] = "Engine"
colnames(porsche911)[colnames(porsche911) == "start_of_production"] = "Year"
colnames(porsche911)[colnames(porsche911) == "maximum_speed"] = "MaxSpeed"
colnames(porsche911)[colnames(porsche911) == "acceleration_0.100km.h"] = "ZeroToHundred"
colnames(porsche911)[colnames(porsche911) == "weight.to.power_ratio"] = "WPR"
colnames(porsche911)[colnames(porsche911) == "power"] = "Power"
colnames(porsche911)[colnames(porsche911) == "max_weight"] = "MaxWeight"
# OMIT
porsche911 = na.omit(porsche911)
# Getting rid of written measurements
porsche911$WPR = gsub("kg/Hp.*", "", porsche911$WPR)
porsche911$Power = gsub("Hp.*", "", porsche911$Power)
porsche911$ZeroToHundred = gsub("-.*", "", porsche911$ZeroToHundred)
# Change String columns to Numeric
porsche911$Year = as.numeric(porsche911$Year)
porsche911$MaxSpeed = as.numeric(porsche911$MaxSpeed)
porsche911$ZeroToHundred = as.numeric(porsche911$ZeroToHundred)
porsche911$WPR = as.numeric(porsche911$WPR)
porsche911$Power = as.numeric(porsche911$Power)
summary(lm(porsche911$MaxSpeed~porsche911$Year))
##
## Call:
## lm(formula = porsche911$MaxSpeed ~ porsche911$Year)
##
## Residuals:
## Min 1Q Median 3Q Max
## -76.192 -7.518 -0.553 5.552 44.040
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3092.9596 135.4189 -22.84 <2e-16 ***
## porsche911$Year 1.6860 0.0675 24.98 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.39 on 233 degrees of freedom
## Multiple R-squared: 0.7281, Adjusted R-squared: 0.7269
## F-statistic: 624 on 1 and 233 DF, p-value: < 2.2e-16
ggplotly(ggplot(porsche911, aes(x = Year, y = MaxSpeed, color = Generation, text = Engine)) +
geom_point(alpha = 5/8) +
labs(x = "Year", y = "Maximum Speed(Km/h)") +
ggtitle("Maximum Speed of the Porsche 911 models Over Time"))
summary(lm(porsche911$Power~porsche911$Year))
##
## Call:
## lm(formula = porsche911$Power ~ porsche911$Year)
##
## Residuals:
## Min 1Q Median 3Q Max
## -100.543 -29.959 -14.076 7.681 246.809
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.260e+04 5.967e+02 -21.11 <2e-16 ***
## porsche911$Year 6.470e+00 2.974e-01 21.76 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 58.99 on 233 degrees of freedom
## Multiple R-squared: 0.6701, Adjusted R-squared: 0.6687
## F-statistic: 473.3 on 1 and 233 DF, p-value: < 2.2e-16
ggplotly(ggplot(porsche911, aes(x = Year, y = Power, color = Engine, text = Generation)) +
geom_point(alpha = 5/8) +
labs(x = "Year", y = "Horsepower (Hp)") +
ggtitle("Total Horsepower of the Porsche 911 models Over Time"))
summary(lm(porsche911$MaxSpeed~porsche911$Power + porsche911$Year))
##
## Call:
## lm(formula = porsche911$MaxSpeed ~ porsche911$Power + porsche911$Year)
##
## Residuals:
## Min 1Q Median 3Q Max
## -75.267 -4.089 0.771 4.168 44.023
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -989.18347 156.85250 -6.306 1.43e-09 ***
## porsche911$Power 0.16700 0.01009 16.551 < 2e-16 ***
## porsche911$Year 0.60549 0.07975 7.592 7.64e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.085 on 232 degrees of freedom
## Multiple R-squared: 0.8753, Adjusted R-squared: 0.8742
## F-statistic: 814.4 on 2 and 232 DF, p-value: < 2.2e-16
summary(lm(porsche911$MaxWeight~porsche911$Year))
##
## Call:
## lm(formula = porsche911$MaxWeight ~ porsche911$Year)
##
## Residuals:
## Min 1Q Median 3Q Max
## -337.31 -51.95 17.25 59.38 223.86
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.968e+04 9.292e+02 -21.18 <2e-16 ***
## porsche911$Year 1.073e+01 4.632e-01 23.18 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 91.86 on 233 degrees of freedom
## Multiple R-squared: 0.6975, Adjusted R-squared: 0.6962
## F-statistic: 537.2 on 1 and 233 DF, p-value: < 2.2e-16
ggplotly(ggplot(porsche911, aes(x = Year, y = MaxWeight, color = Engine, text = Generation)) +
geom_point(alpha = 5/8) +
labs(x = "Year", y = "Horsepower (Hp)") +
ggtitle("Total Weight of the Porsche 911 models Over Time"))
summary(lm(porsche911$ZeroToHundred~porsche911$WPR + porsche911$Year))
##
## Call:
## lm(formula = porsche911$ZeroToHundred ~ porsche911$WPR + porsche911$Year)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.92440 -0.24419 -0.02088 0.25601 0.83703
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.38346 5.92908 5.799 2.17e-08 ***
## porsche911$WPR 0.92588 0.03884 23.836 < 2e-16 ***
## porsche911$Year -0.01666 0.00289 -5.763 2.62e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3275 on 232 degrees of freedom
## Multiple R-squared: 0.9161, Adjusted R-squared: 0.9154
## F-statistic: 1267 on 2 and 232 DF, p-value: < 2.2e-16
ggplotly(ggplot(porsche911, aes(x = WPR, y = ZeroToHundred, color = Generation, text = Engine)) +
geom_point(alpha = 5/8) +
labs(x = "Weight to Power Ratio", y = "0-100Km/h Time (Seconds)") +
ggtitle("0-100Km/h Times of the 911 w/ Respect to Weight to Power Ratio(Lower = Better)"))